home *** CD-ROM | disk | FTP | other *** search
/ Celestin Apprentice 5 / Apprentice-Release5.iso / Source Code / Libraries / DCLAP 6d / dclap6d / SeqPups / appsrc / autoseq.src / CPeakList.H < prev    next >
Text File  |  1996-07-05  |  8KB  |  245 lines

  1. //    ============================================================================
  2. //    CPeakList.H                                                        80 columns
  3. //    Reece Hart    (reece@ibc.wustl.edu)                                tab=4 spaces
  4. //    Washington University School of Medicine, St. Louis, Missouri
  5. //    This source is hereby released to the public domain.  Bug reports, code
  6. //    contributions, and suggestions are appreciated (to email address above).
  7. //    -    -    -    -    -    -    -    -    -    -    -    -    -    -    -    -
  8. //    This class represents a sequence of peaks picked by CTrace and is used by
  9. //    both CTrace and CTraceFile.  It uses CSequence as a base class, but adds
  10. //    many application-specific variables and methods which accommodate
  11. //    statistical analysis of the peaks.
  12. //
  13. //    Specifically, this source collects a list of peaks and computes the
  14. //    following:
  15. //        1. the equation for the exponential decay of peak heights with
  16. //            increasing index.
  17. //        2. the equation for the linear increase in peak widths with    increasing
  18. //            index.
  19. //        3. the expected fluorescence trace by modeling the fluorescence of each
  20. //            peak as a Gaussian
  21. //        4. the peak trace, which qualitatively shows the peak height and width
  22. //            and is useful for superimposing over the observed trace.
  23. //        5. the residual trace, obtained by subtracting the expected fluorescence
  24. //            from the observed fluorescence at each sample.
  25. //        6. P(H|D) from P(H), P(D|NULL), P(D|H)  (See ComputePeakStats)
  26. //
  27. //    NOTES
  28. //    *    This is the most volatile portion of this project.  For that reason,
  29. //        error handling is rudimentary.  I haven't bothered to implement access
  30. //        functions.
  31. //
  32. //    MODIFICATION HISTORY
  33. //    93.11.11    Reece    First release
  34. //    ========================================|===================================
  35.  
  36. #ifndef _H_CPeakList                        // include this file only once
  37. #define _H_CPeakList
  38.  
  39. #include  <stdlib.h>  // dgg - for size_t
  40. #include    <iostream.h>
  41. #include    <iomanip.h>
  42. //#include    "CSequence.H"
  43. #include    "CSequence.C" // dgg
  44. #include    "RInclude.H"
  45. #include    "DNA.H"
  46. #include    "Definitions.H"
  47.  
  48. template<class T>
  49. class CTrace;                                // forward declaration
  50.  
  51. struct    PeakRec
  52.     {
  53.     base_t        whichTrace;                    // which trace (A,C,G,T)
  54.     tracepos    offset;                        // leftCutoff offset
  55.     tracepos    center;                        // index of peak
  56.     double        height;                        // value at index
  57.     double        width;                        // width at 1/2 height
  58.     double        leftBound;                    // left bound at 1/2 height
  59.     double        rightBound;                    // right bound at 1/2 height
  60.     double        PH;                            // P(H)
  61.     double        PDN;                        // P(D|NULL)
  62.     double        PDH;                        // P(D|H)
  63.     double        PHD;                        // P(H|D) = P(D|H)*P(H)/P(D|NULL)
  64.  
  65.                 PeakRec(void);                // constructor
  66.  
  67.     friend
  68.     ostream&    operator<<(ostream& os, const PeakRec& pr);
  69.                 // Writes the instance of the peak object in a single line, 100
  70.                 // columns across.  See PeakRecHeader just below class.
  71.  
  72.     friend inline 
  73.     int            operator==(const PeakRec& r1, const PeakRec& r2);
  74.                 // Operator specifies conditions for two peaks to be considered
  75.                 // identical.
  76.     };
  77.  
  78.     ostream&    PeakRecHeader(ostream& os);
  79.                 // ostream manipulator which writes a line of column headings
  80.                 // for the << output operator.
  81.                 // usage:  myostream << PeakRecHeader;
  82.  
  83.  
  84. class CPeakList : public CSequence<PeakRec>
  85.     {
  86.     private:
  87.     static vrsn    version;
  88.  
  89.     // inherit CSequence variables
  90.     public:
  91.     double        hmean;                        // peak height mean
  92.     double        hvariance;                    //   and variance
  93.  
  94.     double        hm0;                        // height mean @ i=0
  95.     double        hmdecay;                    // hm exp. decay
  96.  
  97.     double        hv0;                        // height variance @ i=0
  98.     double        hvdecay;                    // hv exp. decay
  99.  
  100.     double        w0;                            // width modeled by least sq. fit
  101.     double        wconst;                        // w(i)=w0 + wconst*i
  102.  
  103.     private:
  104.     uint        group1_left;                // group 1 = first third
  105.     uint        group1_right;
  106.     tracepos    group1_leftidx;
  107.     tracepos    group1_rightidx;
  108.     double        group1_hmean;
  109.     double        group1_hvariance;
  110.     double        group1_wd_mean;
  111.     double        group1_index_mean;
  112.  
  113.     uint        group2_left;                // group 2 = second third
  114.     uint        group2_right;
  115.     tracepos    group2_leftidx;
  116.     tracepos    group2_rightidx;
  117.     double        group2_hmean;
  118.     double        group2_wd_mean;
  119.     double        group2_hvariance;
  120.     double        group2_index_mean;
  121.  
  122.  
  123.         // dgg -- these three where private !
  124.     public: 
  125.     CTrace<double>* FTrace;                    // computed fluorescence trace
  126.     CTrace<double>* PTrace;                    // trace of peak widths/heights
  127.     CTrace<double>* RTrace;                    // trace of residuals
  128.     
  129.     public:
  130.     vrsn&        Version();                    // return version # of class
  131.  
  132.                 CPeakList(void);            // constructor
  133.                 ~CPeakList(void);            // destructor
  134.  
  135.  
  136.     char*        Sequence(char* buf=NULL);    // return sequence
  137.  
  138.     inline
  139.     tracepos    Delta(ulong n, ushort nbhd=1);
  140.                 // The delta between bases m and n is defined to be the distance
  141.                 // between the centers of their corresponding peaks m and n in
  142.                 // the trace.  By default, it returns the delta for base n;
  143.                 // specifying nbhd returns the delta for the nbhd 3'-most bases.
  144.  
  145.     inline
  146.     void        RemovePeak(tracepos peakCenter);
  147.                 // Removes a peak from a trace.  For the time being, it does
  148.                 // so only on the basis of peak center.
  149.  
  150.     bool        Analyze(CTrace<trace_t>& ct);// Automates the following:
  151.                 // Automates the process of computing peak probabilities from a
  152.                 // peak list.  It generates the fluorescence and residual traces
  153.                 // and (permanently) stores the results in the FTrace and RTrace
  154.                 // members.
  155.  
  156.     void        CalculateStats(void);        // compute peak statistics
  157.                 // Computes statistics for the set of peaks in the list.
  158.                 // Specifically, it computes:
  159.                 //   1. the overall mean and variance of peak heights
  160.                 //   2. the linear increase of peak width with index
  161.                 //   3. the exponential equation decay of mean peak height decay
  162.  
  163.     void        CalculatePeakStats(double h_baseline);
  164.                 // Computes the individual peak probabilities using a Gaussian
  165.                 // model for the peak fluorescence, height distribution, and 
  166.                 // noise estimate, and Bayes' Theorem to compute the probability
  167.                 // of a particular peak given the observations.
  168.  
  169.     void        WriteDeltas(ostream& os, uint nbhd, bool header=FALSE);
  170.                 // For every subsequence of length nbhd, write the index of the
  171.                 // 3' most peak center for that subsequence, the subsequence
  172.                 // itself, and the delta for the subsequence.  See Delta(...)
  173.                 // If header is TRUE, a simple column header is written first.
  174.  
  175.     void        Offset(tracepos offset);
  176.                 // Horizontally translates all references to trace indices by
  177.                 // offset (offset added to each index).  Useful for defining the
  178.                 // zero point (ie. the primer position).
  179.  
  180.     CTrace<double>*
  181.                 ComputeFTrace(size_t sz,tracepos extent=50);
  182.                 // Computes a trace which represents the expected fluorescence
  183.                 // from the list of peaks by assuming a Gaussian distribution at
  184.                 // each peak (using the center, height, and width fields of the
  185.                 // PeakRec).  The extent parameter specifies the horizontal
  186.                 // extent of the Gaussian on each side of center.  The size of
  187.                 // the original trace must be passed so that the original and
  188.                 // fluorescence traces will be the same size.
  189.  
  190.     CTrace<double>*
  191.                 ComputePTrace(CTrace<trace_t>& src_trace);
  192. //                ComputePTrace(size_t sz);
  193.                 // Generates a trace which shows peak height and width for every
  194.                 // peak in the peak list. The argument passed to ComputePTrace
  195.                 // is the size of the original trace, which will also be the
  196.                 // size of the peak trace.  Peak traces are especially
  197.                 // informative when overlaid with the trace from which the peaks
  198.                 // were picked.
  199.  
  200.     CTrace<double>*
  201.                 ComputeRTrace(CTrace<trace_t>& src_trace);
  202.                 // Generates a new trace of residuals (data that cannot be
  203.                 // explained by peaks) by subtracting the computed FTrace from
  204.                 // the original trace.  A pointer to the result is stored in
  205.                 // RTrace and returned (or NULL if error).
  206.  
  207.  
  208.     friend
  209.     ostream&    operator<<(ostream& os, CPeakList& cpl);
  210.                 //    Display a summary of the peak list and it's members
  211.     };
  212.  
  213.  
  214. inline
  215. vrsn&
  216. CPeakList::Version()
  217.     {
  218.     return version;
  219.     }
  220.  
  221. inline
  222. void
  223. CPeakList::RemovePeak(
  224.     tracepos peakCenter)
  225.     {
  226.     for(ulong index=0;index<size;index++)
  227.         if ((*this)[index].center == peakCenter)
  228.             { Remove(index); break; }
  229.     }
  230.  
  231. inline
  232. tracepos
  233. CPeakList::Delta(ulong n, ushort nbhd)
  234.     {
  235.     if ((n-nbhd<0) || (n>size-1))
  236.         return 0;
  237.     else
  238.         return (*this)[n].center-(*this)[n-nbhd].center;
  239.     }
  240.  
  241. inline int operator==( const PeakRec& r1, const PeakRec& r2)
  242.     { return (r1.center == r2.center); }
  243.  
  244. #endif                                        // conditional inclusion
  245.